1 package org.apache.lucene.facet;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 import java.io.IOException;
21 import java.util.ArrayList;
22 import java.util.Arrays;
23 import java.util.Collections;
24 import java.util.HashMap;
25 import java.util.HashSet;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.Set;
29
30 import org.apache.lucene.analysis.MockAnalyzer;
31 import org.apache.lucene.document.Document;
32 import org.apache.lucene.document.Field;
33 import org.apache.lucene.document.SortedDocValuesField;
34 import org.apache.lucene.document.StringField;
35 import org.apache.lucene.facet.DrillSideways.DrillSidewaysResult;
36 import org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState;
37 import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
38 import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
39 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
40 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
41 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
42 import org.apache.lucene.index.IndexReader;
43 import org.apache.lucene.index.IndexWriterConfig;
44 import org.apache.lucene.index.LeafReaderContext;
45 import org.apache.lucene.index.RandomIndexWriter;
46 import org.apache.lucene.index.Term;
47 import org.apache.lucene.search.BooleanClause;
48 import org.apache.lucene.search.BooleanClause.Occur;
49 import org.apache.lucene.search.BooleanQuery;
50 import org.apache.lucene.search.FilteredQuery;
51 import org.apache.lucene.search.IndexSearcher;
52 import org.apache.lucene.search.MatchAllDocsQuery;
53 import org.apache.lucene.search.Query;
54 import org.apache.lucene.search.RandomAccessWeight;
55 import org.apache.lucene.search.ScoreDoc;
56 import org.apache.lucene.search.SimpleCollector;
57 import org.apache.lucene.search.Sort;
58 import org.apache.lucene.search.SortField;
59 import org.apache.lucene.search.TermQuery;
60 import org.apache.lucene.search.TopDocs;
61 import org.apache.lucene.search.Weight;
62 import org.apache.lucene.store.Directory;
63 import org.apache.lucene.util.Bits;
64 import org.apache.lucene.util.BytesRef;
65 import org.apache.lucene.util.IOUtils;
66 import org.apache.lucene.util.InPlaceMergeSorter;
67 import org.apache.lucene.util.InfoStream;
68 import org.apache.lucene.util.TestUtil;
69
70 public class TestDrillSideways extends FacetTestCase {
71
72 public void testBasic() throws Exception {
73 Directory dir = newDirectory();
74 Directory taxoDir = newDirectory();
75
76
77
78 DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
79
80 FacetsConfig config = new FacetsConfig();
81 config.setHierarchical("Publish Date", true);
82
83 RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
84
85 Document doc = new Document();
86 doc.add(new FacetField("Author", "Bob"));
87 doc.add(new FacetField("Publish Date", "2010", "10", "15"));
88 writer.addDocument(config.build(taxoWriter, doc));
89
90 doc = new Document();
91 doc.add(new FacetField("Author", "Lisa"));
92 doc.add(new FacetField("Publish Date", "2010", "10", "20"));
93 writer.addDocument(config.build(taxoWriter, doc));
94
95 doc = new Document();
96 doc.add(new FacetField("Author", "Lisa"));
97 doc.add(new FacetField("Publish Date", "2012", "1", "1"));
98 writer.addDocument(config.build(taxoWriter, doc));
99
100 doc = new Document();
101 doc.add(new FacetField("Author", "Susan"));
102 doc.add(new FacetField("Publish Date", "2012", "1", "7"));
103 writer.addDocument(config.build(taxoWriter, doc));
104
105 doc = new Document();
106 doc.add(new FacetField("Author", "Frank"));
107 doc.add(new FacetField("Publish Date", "1999", "5", "5"));
108 writer.addDocument(config.build(taxoWriter, doc));
109
110
111 IndexSearcher searcher = newSearcher(writer.getReader());
112
113
114
115
116 TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
117
118 DrillSideways ds = new DrillSideways(searcher, config, taxoReader);
119
120
121
122
123 DrillDownQuery ddq = new DrillDownQuery(config);
124 ddq.add("Author", "Lisa");
125 DrillSidewaysResult r = ds.search(null, ddq, 10);
126 assertEquals(2, r.hits.totalHits);
127
128
129 assertEquals("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
130
131
132
133
134 assertEquals("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", r.facets.getTopChildren(10, "Author").toString());
135
136
137
138
139
140 ddq = new DrillDownQuery(config);
141 ddq.add("Author", "Lisa");
142 r = ds.search(null, ddq, 10);
143
144 assertEquals(2, r.hits.totalHits);
145
146
147 assertEquals("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
148
149
150
151
152 assertEquals("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", r.facets.getTopChildren(10, "Author").toString());
153
154
155
156 ddq = new DrillDownQuery(config);
157 ddq.add("Author", "Lisa");
158 ddq.add("Author", "Bob");
159 r = ds.search(null, ddq, 10);
160 assertEquals(3, r.hits.totalHits);
161
162
163 assertEquals("dim=Publish Date path=[] value=3 childCount=2\n 2010 (2)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
164
165
166
167 assertEquals("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", r.facets.getTopChildren(10, "Author").toString());
168
169 assertTrue(r.facets instanceof MultiFacets);
170 List<FacetResult> allResults = r.facets.getAllDims(10);
171 assertEquals(2, allResults.size());
172 assertEquals("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", allResults.get(0).toString());
173 assertEquals("dim=Publish Date path=[] value=3 childCount=2\n 2010 (2)\n 2012 (1)\n", allResults.get(1).toString());
174
175
176 ddq = new DrillDownQuery(config);
177 ddq.add("Author", "Lisa");
178 ddq.add("Publish Date", "2010");
179 r = ds.search(null, ddq, 10);
180 assertEquals(1, r.hits.totalHits);
181
182
183 assertEquals("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
184
185
186 assertEquals("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", r.facets.getTopChildren(10, "Author").toString());
187
188
189
190 ddq = new DrillDownQuery(config);
191
192
193 ddq.add("Author", "Lisa");
194 ddq.add("Publish Date", "2010");
195 ddq.add("Author", "Bob");
196 r = ds.search(null, ddq, 10);
197 assertEquals(2, r.hits.totalHits);
198
199
200 assertEquals("dim=Publish Date path=[] value=3 childCount=2\n 2010 (2)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
201
202
203 assertEquals("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", r.facets.getTopChildren(10, "Author").toString());
204
205
206 ddq = new DrillDownQuery(config);
207 ddq.add("Foobar", "Baz");
208 r = ds.search(null, ddq, 10);
209 assertEquals(0, r.hits.totalHits);
210 assertNull(r.facets.getTopChildren(10, "Publish Date"));
211 assertNull(r.facets.getTopChildren(10, "Foobar"));
212
213
214 ddq = new DrillDownQuery(config);
215 ddq.add("Author", "Lisa");
216 ddq.add("Author", "Tom");
217 r = ds.search(null, ddq, 10);
218 assertEquals(2, r.hits.totalHits);
219
220
221 assertEquals("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
222
223
224
225 assertEquals("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", r.facets.getTopChildren(10, "Author").toString());
226
227
228
229 ddq = new DrillDownQuery(config);
230 ddq.add("Author", "Lisa");
231 ddq.add("Author", "Tom");
232 r = ds.search(null, ddq, 10);
233 assertEquals(2, r.hits.totalHits);
234
235
236 assertEquals("dim=Publish Date path=[] value=2 childCount=2\n 2010 (1)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
237
238
239 ddq = new DrillDownQuery(config, new TermQuery(new Term("foobar", "baz")));
240 ddq.add("Author", "Lisa");
241 r = ds.search(null, ddq, 10);
242
243 assertEquals(0, r.hits.totalHits);
244 assertNull(r.facets.getTopChildren(10, "Publish Date"));
245 assertNull(r.facets.getTopChildren(10, "Author"));
246 writer.close();
247 IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir);
248 }
249
250 public void testSometimesInvalidDrillDown() throws Exception {
251 Directory dir = newDirectory();
252 Directory taxoDir = newDirectory();
253 RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
254
255
256
257 DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
258
259 FacetsConfig config = new FacetsConfig();
260 config.setHierarchical("Publish Date", true);
261
262 Document doc = new Document();
263 doc.add(new FacetField("Author", "Bob"));
264 doc.add(new FacetField("Publish Date", "2010", "10", "15"));
265 writer.addDocument(config.build(taxoWriter, doc));
266
267 doc = new Document();
268 doc.add(new FacetField("Author", "Lisa"));
269 doc.add(new FacetField("Publish Date", "2010", "10", "20"));
270 writer.addDocument(config.build(taxoWriter, doc));
271
272 writer.commit();
273
274
275 doc = new Document();
276 doc.add(new FacetField("Foobar", "Lisa"));
277 doc.add(new FacetField("Publish Date", "2012", "1", "1"));
278 writer.addDocument(config.build(taxoWriter, doc));
279
280
281 IndexSearcher searcher = newSearcher(writer.getReader());
282
283
284
285
286 TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
287
288 DrillDownQuery ddq = new DrillDownQuery(config);
289 ddq.add("Author", "Lisa");
290 DrillSidewaysResult r = new DrillSideways(searcher, config, taxoReader).search(null, ddq, 10);
291
292 assertEquals(1, r.hits.totalHits);
293
294
295 assertEquals("dim=Publish Date path=[] value=1 childCount=1\n 2010 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
296
297
298
299 assertEquals("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", r.facets.getTopChildren(10, "Author").toString());
300
301 writer.close();
302 IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir);
303 }
304
305 public void testMultipleRequestsPerDim() throws Exception {
306 Directory dir = newDirectory();
307 Directory taxoDir = newDirectory();
308 RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
309
310
311
312 DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
313
314 FacetsConfig config = new FacetsConfig();
315 config.setHierarchical("dim", true);
316
317 Document doc = new Document();
318 doc.add(new FacetField("dim", "a", "x"));
319 writer.addDocument(config.build(taxoWriter, doc));
320
321 doc = new Document();
322 doc.add(new FacetField("dim", "a", "y"));
323 writer.addDocument(config.build(taxoWriter, doc));
324
325 doc = new Document();
326 doc.add(new FacetField("dim", "a", "z"));
327 writer.addDocument(config.build(taxoWriter, doc));
328
329 doc = new Document();
330 doc.add(new FacetField("dim", "b"));
331 writer.addDocument(config.build(taxoWriter, doc));
332
333 doc = new Document();
334 doc.add(new FacetField("dim", "c"));
335 writer.addDocument(config.build(taxoWriter, doc));
336
337 doc = new Document();
338 doc.add(new FacetField("dim", "d"));
339 writer.addDocument(config.build(taxoWriter, doc));
340
341
342 IndexSearcher searcher = newSearcher(writer.getReader());
343
344
345
346
347 TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
348
349 DrillDownQuery ddq = new DrillDownQuery(config);
350 ddq.add("dim", "a");
351 DrillSidewaysResult r = new DrillSideways(searcher, config, taxoReader).search(null, ddq, 10);
352
353 assertEquals(3, r.hits.totalHits);
354 assertEquals("dim=dim path=[] value=6 childCount=4\n a (3)\n b (1)\n c (1)\n d (1)\n", r.facets.getTopChildren(10, "dim").toString());
355 assertEquals("dim=dim path=[a] value=3 childCount=3\n x (1)\n y (1)\n z (1)\n", r.facets.getTopChildren(10, "dim", "a").toString());
356
357 writer.close();
358 IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir);
359 }
360
361 private static class Doc implements Comparable<Doc> {
362 String id;
363 String contentToken;
364
365 public Doc() {}
366
367
368
369 int[] dims;
370
371
372
373 int[] dims2;
374 boolean deleted;
375
376 @Override
377 public int compareTo(Doc other) {
378 return id.compareTo(other.id);
379 }
380 }
381
382 private double aChance, bChance, cChance;
383
384 private String randomContentToken(boolean isQuery) {
385 double d = random().nextDouble();
386 if (isQuery) {
387 if (d < 0.33) {
388 return "a";
389 } else if (d < 0.66) {
390 return "b";
391 } else {
392 return "c";
393 }
394 } else {
395 if (d <= aChance) {
396 return "a";
397 } else if (d < aChance + bChance) {
398 return "b";
399 } else {
400 return "c";
401 }
402 }
403 }
404
405 public void testRandom() throws Exception {
406
407 while (aChance == 0.0) {
408 aChance = random().nextDouble();
409 }
410 while (bChance == 0.0) {
411 bChance = random().nextDouble();
412 }
413 while (cChance == 0.0) {
414 cChance = random().nextDouble();
415 }
416
417
418
419 double sum = aChance + bChance + cChance;
420 aChance /= sum;
421 bChance /= sum;
422 cChance /= sum;
423
424 int numDims = TestUtil.nextInt(random(), 2, 5);
425
426 int numDocs = atLeast(3000);
427
428 if (VERBOSE) {
429 System.out.println("numDims=" + numDims + " numDocs=" + numDocs + " aChance=" + aChance + " bChance=" + bChance + " cChance=" + cChance);
430 }
431 String[][] dimValues = new String[numDims][];
432 int valueCount = 2;
433
434 for(int dim=0;dim<numDims;dim++) {
435 Set<String> values = new HashSet<>();
436 while (values.size() < valueCount) {
437 String s = TestUtil.randomRealisticUnicodeString(random());
438
439 if (s.length() > 0) {
440 values.add(s);
441 }
442 }
443 dimValues[dim] = values.toArray(new String[values.size()]);
444 valueCount *= 2;
445 }
446
447 List<Doc> docs = new ArrayList<>();
448 for(int i=0;i<numDocs;i++) {
449 Doc doc = new Doc();
450 doc.id = ""+i;
451 doc.contentToken = randomContentToken(false);
452 doc.dims = new int[numDims];
453 doc.dims2 = new int[numDims];
454 for(int dim=0;dim<numDims;dim++) {
455 if (random().nextInt(5) == 3) {
456
457 doc.dims[dim] = -1;
458 } else if (dimValues[dim].length <= 4) {
459 int dimUpto = 0;
460 doc.dims[dim] = dimValues[dim].length-1;
461 while (dimUpto < dimValues[dim].length) {
462 if (random().nextBoolean()) {
463 doc.dims[dim] = dimUpto;
464 break;
465 }
466 dimUpto++;
467 }
468 } else {
469 doc.dims[dim] = random().nextInt(dimValues[dim].length);
470 }
471
472 if (random().nextInt(5) == 3) {
473
474 doc.dims2[dim] = random().nextInt(dimValues[dim].length);
475 } else {
476 doc.dims2[dim] = -1;
477 }
478 }
479 docs.add(doc);
480 }
481
482 Directory d = newDirectory();
483 Directory td = newDirectory();
484
485 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
486 iwc.setInfoStream(InfoStream.NO_OUTPUT);
487 RandomIndexWriter w = new RandomIndexWriter(random(), d, iwc);
488 DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(td, IndexWriterConfig.OpenMode.CREATE);
489 FacetsConfig config = new FacetsConfig();
490 for(int i=0;i<numDims;i++) {
491 config.setMultiValued("dim"+i, true);
492 }
493
494 boolean doUseDV = random().nextBoolean();
495
496 for(Doc rawDoc : docs) {
497 Document doc = new Document();
498 doc.add(newStringField("id", rawDoc.id, Field.Store.YES));
499 doc.add(new SortedDocValuesField("id", new BytesRef(rawDoc.id)));
500 doc.add(newStringField("content", rawDoc.contentToken, Field.Store.NO));
501
502 if (VERBOSE) {
503 System.out.println(" doc id=" + rawDoc.id + " token=" + rawDoc.contentToken);
504 }
505 for(int dim=0;dim<numDims;dim++) {
506 int dimValue = rawDoc.dims[dim];
507 if (dimValue != -1) {
508 if (doUseDV) {
509 doc.add(new SortedSetDocValuesFacetField("dim" + dim, dimValues[dim][dimValue]));
510 } else {
511 doc.add(new FacetField("dim" + dim, dimValues[dim][dimValue]));
512 }
513 doc.add(new StringField("dim" + dim, dimValues[dim][dimValue], Field.Store.YES));
514 if (VERBOSE) {
515 System.out.println(" dim" + dim + "=" + new BytesRef(dimValues[dim][dimValue]));
516 }
517 }
518 int dimValue2 = rawDoc.dims2[dim];
519 if (dimValue2 != -1) {
520 if (doUseDV) {
521 doc.add(new SortedSetDocValuesFacetField("dim" + dim, dimValues[dim][dimValue2]));
522 } else {
523 doc.add(new FacetField("dim" + dim, dimValues[dim][dimValue2]));
524 }
525 doc.add(new StringField("dim" + dim, dimValues[dim][dimValue2], Field.Store.YES));
526 if (VERBOSE) {
527 System.out.println(" dim" + dim + "=" + new BytesRef(dimValues[dim][dimValue2]));
528 }
529 }
530 }
531
532 w.addDocument(config.build(tw, doc));
533 }
534
535 if (random().nextBoolean()) {
536
537 int numDel = TestUtil.nextInt(random(), 1, (int) (numDocs * 0.05));
538 if (VERBOSE) {
539 System.out.println("delete " + numDel);
540 }
541 int delCount = 0;
542 while (delCount < numDel) {
543 Doc doc = docs.get(random().nextInt(docs.size()));
544 if (!doc.deleted) {
545 if (VERBOSE) {
546 System.out.println(" delete id=" + doc.id);
547 }
548 doc.deleted = true;
549 w.deleteDocuments(new Term("id", doc.id));
550 delCount++;
551 }
552 }
553 }
554
555 if (random().nextBoolean()) {
556 if (VERBOSE) {
557 System.out.println("TEST: forceMerge(1)...");
558 }
559 w.forceMerge(1);
560 }
561 IndexReader r = w.getReader();
562
563 final SortedSetDocValuesReaderState sortedSetDVState;
564 IndexSearcher s = newSearcher(r);
565
566 if (doUseDV) {
567 sortedSetDVState = new DefaultSortedSetDocValuesReaderState(s.getIndexReader());
568 } else {
569 sortedSetDVState = null;
570 }
571
572 if (VERBOSE) {
573 System.out.println("r.numDocs() = " + r.numDocs());
574 }
575
576
577 TaxonomyReader tr = new DirectoryTaxonomyReader(tw);
578
579 int numIters = atLeast(10);
580
581 for(int iter=0;iter<numIters;iter++) {
582
583 String contentToken = random().nextInt(30) == 17 ? null : randomContentToken(true);
584 int numDrillDown = TestUtil.nextInt(random(), 1, Math.min(4, numDims));
585 if (VERBOSE) {
586 System.out.println("\nTEST: iter=" + iter + " baseQuery=" + contentToken + " numDrillDown=" + numDrillDown + " useSortedSetDV=" + doUseDV);
587 }
588
589 String[][] drillDowns = new String[numDims][];
590
591 int count = 0;
592 boolean anyMultiValuedDrillDowns = false;
593 while (count < numDrillDown) {
594 int dim = random().nextInt(numDims);
595 if (drillDowns[dim] == null) {
596 if (random().nextBoolean()) {
597
598 drillDowns[dim] = new String[] {dimValues[dim][random().nextInt(dimValues[dim].length)]};
599 } else {
600 int orCount = TestUtil.nextInt(random(), 1, Math.min(5, dimValues[dim].length));
601 drillDowns[dim] = new String[orCount];
602 anyMultiValuedDrillDowns |= orCount > 1;
603 for(int i=0;i<orCount;i++) {
604 while (true) {
605 String value = dimValues[dim][random().nextInt(dimValues[dim].length)];
606 for(int j=0;j<i;j++) {
607 if (value.equals(drillDowns[dim][j])) {
608 value = null;
609 break;
610 }
611 }
612 if (value != null) {
613 drillDowns[dim][i] = value;
614 break;
615 }
616 }
617 }
618 }
619 if (VERBOSE) {
620 BytesRef[] values = new BytesRef[drillDowns[dim].length];
621 for(int i=0;i<values.length;i++) {
622 values[i] = new BytesRef(drillDowns[dim][i]);
623 }
624 System.out.println(" dim" + dim + "=" + Arrays.toString(values));
625 }
626 count++;
627 }
628 }
629
630 Query baseQuery;
631 if (contentToken == null) {
632 baseQuery = new MatchAllDocsQuery();
633 } else {
634 baseQuery = new TermQuery(new Term("content", contentToken));
635 }
636
637 DrillDownQuery ddq = new DrillDownQuery(config, baseQuery);
638
639 for(int dim=0;dim<numDims;dim++) {
640 if (drillDowns[dim] != null) {
641 for(String value : drillDowns[dim]) {
642 ddq.add("dim" + dim, value);
643 }
644 }
645 }
646
647 Query filter;
648 if (random().nextInt(7) == 6) {
649 if (VERBOSE) {
650 System.out.println(" only-even filter");
651 }
652 filter = new Query() {
653
654 @Override
655 public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
656 return new RandomAccessWeight(this) {
657 @Override
658 protected Bits getMatchingDocs(final LeafReaderContext context) throws IOException {
659 return new Bits() {
660
661 @Override
662 public boolean get(int docID) {
663 try {
664 return (Integer.parseInt(context.reader().document(docID).get("id")) & 1) == 0;
665 } catch (NumberFormatException | IOException e) {
666 throw new RuntimeException(e);
667 }
668 }
669
670 @Override
671 public int length() {
672 return context.reader().maxDoc();
673 }
674
675 };
676 }
677 };
678 }
679
680 @Override
681 public String toString(String field) {
682 return "drillSidewaysTestFilter";
683 }
684
685 };
686 } else {
687 filter = null;
688 }
689
690
691
692
693 new DrillSideways(s, config, tr).search(ddq,
694 new SimpleCollector() {
695 int lastDocID;
696
697 @Override
698 public void collect(int doc) {
699 assert doc > lastDocID;
700 lastDocID = doc;
701 }
702
703 @Override
704 protected void doSetNextReader(LeafReaderContext context) throws IOException {
705 lastDocID = -1;
706 }
707
708 @Override
709 public boolean needsScores() {
710 return false;
711 }
712 });
713
714
715
716
717 if (!anyMultiValuedDrillDowns) {
718
719
720
721
722 new DrillSideways(s, config, tr) {
723 @Override
724 protected boolean scoreSubDocsAtOnce() {
725 return true;
726 }
727 }.search(ddq, new AssertingSubDocsAtOnceCollector());
728 }
729
730 TestFacetResult expected = slowDrillSidewaysSearch(s, docs, contentToken, drillDowns, dimValues, filter);
731
732 Sort sort = new Sort(new SortField("id", SortField.Type.STRING));
733 DrillSideways ds;
734 if (doUseDV) {
735 ds = new DrillSideways(s, config, sortedSetDVState);
736 } else {
737 ds = new DrillSideways(s, config, tr) {
738 @Override
739 protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException {
740 Map<String,Facets> drillSidewaysFacets = new HashMap<>();
741 Facets drillDownFacets = getTaxonomyFacetCounts(taxoReader, config, drillDowns);
742 if (drillSideways != null) {
743 for(int i=0;i<drillSideways.length;i++) {
744 drillSidewaysFacets.put(drillSidewaysDims[i],
745 getTaxonomyFacetCounts(taxoReader, config, drillSideways[i]));
746 }
747 }
748
749 if (drillSidewaysFacets.isEmpty()) {
750 return drillDownFacets;
751 } else {
752 return new MultiFacets(drillSidewaysFacets, drillDownFacets);
753 }
754
755 }
756 };
757 }
758
759
760 DrillSidewaysResult actual = ds.search(ddq, filter, null, numDocs, sort, true, true);
761
762 TopDocs hits = s.search(baseQuery, numDocs);
763 Map<String,Float> scores = new HashMap<>();
764 for(ScoreDoc sd : hits.scoreDocs) {
765 scores.put(s.doc(sd.doc).get("id"), sd.score);
766 }
767 if (VERBOSE) {
768 System.out.println(" verify all facets");
769 }
770 verifyEquals(dimValues, s, expected, actual, scores, doUseDV);
771
772
773 Query q = ddq;
774 if (filter != null) {
775 q = new BooleanQuery.Builder()
776 .add(q, Occur.MUST)
777 .add(filter, Occur.FILTER)
778 .build();
779 }
780 TopDocs ddqHits = s.search(q, numDocs);
781 assertEquals(expected.hits.size(), ddqHits.totalHits);
782 for(int i=0;i<expected.hits.size();i++) {
783
784 assertEquals(scores.get(expected.hits.get(i).id), ddqHits.scoreDocs[i].score, 0.0f);
785 }
786 }
787
788 w.close();
789 IOUtils.close(r, tr, tw, d, td);
790 }
791
792 private static class Counters {
793 int[][] counts;
794
795 public Counters(String[][] dimValues) {
796 counts = new int[dimValues.length][];
797 for(int dim=0;dim<dimValues.length;dim++) {
798 counts[dim] = new int[dimValues[dim].length];
799 }
800 }
801
802 public void inc(int[] dims, int[] dims2) {
803 inc(dims, dims2, -1);
804 }
805
806 public void inc(int[] dims, int[] dims2, int onlyDim) {
807 assert dims.length == counts.length;
808 assert dims2.length == counts.length;
809 for(int dim=0;dim<dims.length;dim++) {
810 if (onlyDim == -1 || dim == onlyDim) {
811 if (dims[dim] != -1) {
812 counts[dim][dims[dim]]++;
813 }
814 if (dims2[dim] != -1 && dims2[dim] != dims[dim]) {
815 counts[dim][dims2[dim]]++;
816 }
817 }
818 }
819 }
820 }
821
822 private static class TestFacetResult {
823 List<Doc> hits;
824 int[][] counts;
825 int[] uniqueCounts;
826 public TestFacetResult() {}
827 }
828
829 private int[] getTopNOrds(final int[] counts, final String[] values, int topN) {
830 final int[] ids = new int[counts.length];
831 for(int i=0;i<ids.length;i++) {
832 ids[i] = i;
833 }
834
835
836
837 new InPlaceMergeSorter() {
838
839 @Override
840 protected void swap(int i, int j) {
841 int id = ids[i];
842 ids[i] = ids[j];
843 ids[j] = id;
844 }
845
846 @Override
847 protected int compare(int i, int j) {
848 int counti = counts[ids[i]];
849 int countj = counts[ids[j]];
850
851 if (counti > countj) {
852 return -1;
853 } else if (counti < countj) {
854 return 1;
855 } else {
856
857 return new BytesRef(values[ids[i]]).compareTo(new BytesRef(values[ids[j]]));
858 }
859 }
860
861 }.sort(0, ids.length);
862
863 if (topN > ids.length) {
864 topN = ids.length;
865 }
866
867 int numSet = topN;
868 for(int i=0;i<topN;i++) {
869 if (counts[ids[i]] == 0) {
870 numSet = i;
871 break;
872 }
873 }
874
875 int[] topNIDs = new int[numSet];
876 System.arraycopy(ids, 0, topNIDs, 0, topNIDs.length);
877 return topNIDs;
878 }
879
880 private TestFacetResult slowDrillSidewaysSearch(IndexSearcher s, List<Doc> docs,
881 String contentToken, String[][] drillDowns,
882 String[][] dimValues, Query onlyEven) throws Exception {
883 int numDims = dimValues.length;
884
885 List<Doc> hits = new ArrayList<>();
886 Counters drillDownCounts = new Counters(dimValues);
887 Counters[] drillSidewaysCounts = new Counters[dimValues.length];
888 for(int dim=0;dim<numDims;dim++) {
889 drillSidewaysCounts[dim] = new Counters(dimValues);
890 }
891
892 if (VERBOSE) {
893 System.out.println(" compute expected");
894 }
895
896 nextDoc: for(Doc doc : docs) {
897 if (doc.deleted) {
898 continue;
899 }
900 if (onlyEven != null & (Integer.parseInt(doc.id) & 1) != 0) {
901 continue;
902 }
903 if (contentToken == null || doc.contentToken.equals(contentToken)) {
904 int failDim = -1;
905 for(int dim=0;dim<numDims;dim++) {
906 if (drillDowns[dim] != null) {
907 String docValue = doc.dims[dim] == -1 ? null : dimValues[dim][doc.dims[dim]];
908 String docValue2 = doc.dims2[dim] == -1 ? null : dimValues[dim][doc.dims2[dim]];
909 boolean matches = false;
910 for(String value : drillDowns[dim]) {
911 if (value.equals(docValue) || value.equals(docValue2)) {
912 matches = true;
913 break;
914 }
915 }
916 if (!matches) {
917 if (failDim == -1) {
918
919 failDim = dim;
920 } else {
921
922 continue nextDoc;
923 }
924 }
925 }
926 }
927
928 if (failDim == -1) {
929 if (VERBOSE) {
930 System.out.println(" exp: id=" + doc.id + " is a hit");
931 }
932
933 hits.add(doc);
934 drillDownCounts.inc(doc.dims, doc.dims2);
935 for(int dim=0;dim<dimValues.length;dim++) {
936 drillSidewaysCounts[dim].inc(doc.dims, doc.dims2);
937 }
938 } else {
939 if (VERBOSE) {
940 System.out.println(" exp: id=" + doc.id + " is a near-miss on dim=" + failDim);
941 }
942 drillSidewaysCounts[failDim].inc(doc.dims, doc.dims2, failDim);
943 }
944 }
945 }
946
947 Map<String,Integer> idToDocID = new HashMap<>();
948 for(int i=0;i<s.getIndexReader().maxDoc();i++) {
949 idToDocID.put(s.doc(i).get("id"), i);
950 }
951
952 Collections.sort(hits);
953
954 TestFacetResult res = new TestFacetResult();
955 res.hits = hits;
956 res.counts = new int[numDims][];
957 res.uniqueCounts = new int[numDims];
958 for (int dim = 0; dim < numDims; dim++) {
959 if (drillDowns[dim] != null) {
960 res.counts[dim] = drillSidewaysCounts[dim].counts[dim];
961 } else {
962 res.counts[dim] = drillDownCounts.counts[dim];
963 }
964 int uniqueCount = 0;
965 for (int j = 0; j < res.counts[dim].length; j++) {
966 if (res.counts[dim][j] != 0) {
967 uniqueCount++;
968 }
969 }
970 res.uniqueCounts[dim] = uniqueCount;
971 }
972
973 return res;
974 }
975
976 void verifyEquals(String[][] dimValues, IndexSearcher s, TestFacetResult expected,
977 DrillSidewaysResult actual, Map<String,Float> scores, boolean isSortedSetDV) throws Exception {
978 if (VERBOSE) {
979 System.out.println(" verify totHits=" + expected.hits.size());
980 }
981 assertEquals(expected.hits.size(), actual.hits.totalHits);
982 assertEquals(expected.hits.size(), actual.hits.scoreDocs.length);
983 for(int i=0;i<expected.hits.size();i++) {
984 if (VERBOSE) {
985 System.out.println(" hit " + i + " expected=" + expected.hits.get(i).id);
986 }
987 assertEquals(expected.hits.get(i).id,
988 s.doc(actual.hits.scoreDocs[i].doc).get("id"));
989
990 assertEquals(scores.get(expected.hits.get(i).id), actual.hits.scoreDocs[i].score, 0.0f);
991 }
992
993 for(int dim=0;dim<expected.counts.length;dim++) {
994 int topN = random().nextBoolean() ? dimValues[dim].length : TestUtil.nextInt(random(), 1, dimValues[dim].length);
995 FacetResult fr = actual.facets.getTopChildren(topN, "dim"+dim);
996 if (VERBOSE) {
997 System.out.println(" dim" + dim + " topN=" + topN + " (vs " + dimValues[dim].length + " unique values)");
998 System.out.println(" actual");
999 }
1000
1001 int idx = 0;
1002 Map<String,Integer> actualValues = new HashMap<>();
1003
1004 if (fr != null) {
1005 for(LabelAndValue labelValue : fr.labelValues) {
1006 actualValues.put(labelValue.label, labelValue.value.intValue());
1007 if (VERBOSE) {
1008 System.out.println(" " + idx + ": " + new BytesRef(labelValue.label) + ": " + labelValue.value);
1009 idx++;
1010 }
1011 }
1012 assertEquals("dim=" + dim, expected.uniqueCounts[dim], fr.childCount);
1013 }
1014
1015 if (topN < dimValues[dim].length) {
1016 int[] topNIDs = getTopNOrds(expected.counts[dim], dimValues[dim], topN);
1017 if (VERBOSE) {
1018 idx = 0;
1019 System.out.println(" expected (sorted)");
1020 for(int i=0;i<topNIDs.length;i++) {
1021 int expectedOrd = topNIDs[i];
1022 String value = dimValues[dim][expectedOrd];
1023 System.out.println(" " + idx + ": " + new BytesRef(value) + ": " + expected.counts[dim][expectedOrd]);
1024 idx++;
1025 }
1026 }
1027 if (VERBOSE) {
1028 System.out.println(" topN=" + topN + " expectedTopN=" + topNIDs.length);
1029 }
1030
1031 if (fr != null) {
1032 assertEquals(topNIDs.length, fr.labelValues.length);
1033 } else {
1034 assertEquals(0, topNIDs.length);
1035 }
1036 for(int i=0;i<topNIDs.length;i++) {
1037 int expectedOrd = topNIDs[i];
1038 assertEquals(expected.counts[dim][expectedOrd], fr.labelValues[i].value.intValue());
1039 if (isSortedSetDV) {
1040
1041
1042 assertEquals("value @ idx=" + i, dimValues[dim][expectedOrd], fr.labelValues[i].label);
1043 }
1044 }
1045 } else {
1046
1047 if (VERBOSE) {
1048 idx = 0;
1049 System.out.println(" expected (unsorted)");
1050 for(int i=0;i<dimValues[dim].length;i++) {
1051 String value = dimValues[dim][i];
1052 if (expected.counts[dim][i] != 0) {
1053 System.out.println(" " + idx + ": " + new BytesRef(value) + ": " + expected.counts[dim][i]);
1054 idx++;
1055 }
1056 }
1057 }
1058
1059 int setCount = 0;
1060 for(int i=0;i<dimValues[dim].length;i++) {
1061 String value = dimValues[dim][i];
1062 if (expected.counts[dim][i] != 0) {
1063 assertTrue(actualValues.containsKey(value));
1064 assertEquals(expected.counts[dim][i], actualValues.get(value).intValue());
1065 setCount++;
1066 } else {
1067 assertFalse(actualValues.containsKey(value));
1068 }
1069 }
1070 assertEquals(setCount, actualValues.size());
1071 }
1072 }
1073 }
1074
1075 public void testEmptyIndex() throws Exception {
1076
1077 Directory dir = newDirectory();
1078 Directory taxoDir = newDirectory();
1079 RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
1080 DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
1081 IndexSearcher searcher = newSearcher(writer.getReader());
1082 TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
1083
1084
1085 FacetsConfig config = new FacetsConfig();
1086 DrillSideways ds = new DrillSideways(searcher, config, taxoReader);
1087 DrillDownQuery ddq = new DrillDownQuery(config);
1088 ddq.add("Author", "Lisa");
1089
1090 DrillSidewaysResult r = ds.search(ddq, 10);
1091 assertEquals(0, r.hits.totalHits);
1092
1093 r = ds.search(ddq, null, null, 10, new Sort(new SortField("foo", SortField.Type.INT)), false, false);
1094 assertEquals(0, r.hits.totalHits);
1095
1096 writer.close();
1097 IOUtils.close(taxoWriter, searcher.getIndexReader(), taxoReader, dir, taxoDir);
1098 }
1099
1100 public void testScorer() throws Exception {
1101
1102 Directory dir = newDirectory();
1103 Directory taxoDir = newDirectory();
1104
1105
1106
1107 DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
1108
1109 FacetsConfig config = new FacetsConfig();
1110
1111 RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
1112
1113 Document doc = new Document();
1114 doc.add(newTextField("field", "foo bar", Field.Store.NO));
1115 doc.add(new FacetField("Author", "Bob"));
1116 doc.add(new FacetField("dim", "a"));
1117 writer.addDocument(config.build(taxoWriter, doc));
1118
1119
1120 IndexSearcher searcher = newSearcher(writer.getReader());
1121
1122
1123 TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
1124
1125 DrillSideways ds = new DrillSideways(searcher, config, taxoReader);
1126
1127 BooleanQuery.Builder bq = new BooleanQuery.Builder();
1128 bq.setDisableCoord(true);
1129 bq.add(new TermQuery(new Term("field", "foo")), BooleanClause.Occur.MUST);
1130 bq.add(new TermQuery(new Term("field", "bar")), BooleanClause.Occur.MUST_NOT);
1131 DrillDownQuery ddq = new DrillDownQuery(config, bq.build());
1132 ddq.add("field", "foo");
1133 ddq.add("author", bq.build());
1134 ddq.add("dim", bq.build());
1135 DrillSidewaysResult r = ds.search(null, ddq, 10);
1136 assertEquals(0, r.hits.totalHits);
1137
1138 writer.close();
1139 IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir);
1140 }
1141 }
1142